You are given the MODIS LAI data files for the year 2012 in the directory files/data for the UK (MODIS tile .
Read the LAI datasets into a masked array, using QA bit 0 to mask the data (i.e. good quality data only) and generate a movie of LAI.
You ought be getting familiar with this sort of problem, as it is very common.
Since we have the code in the main notes to read a single data dataset:
import gdal # Import GDAL library bindings
import numpy as np
# The file that we shall be using
# Needs to be on current directory
filename = 'files/data/MCD15A2.A2011185.h09v05.005.2011213154534.hdf'
g = gdal.Open(filename)
# g should now be a GDAL dataset, but if the file isn't found
# g will be none. Let's test this:
if g is None:
print "Problem opening file %s!" % filename
else:
print "File %s opened fine" % filename
subdatasets = g.GetSubDatasets()
for fname, name in subdatasets:
print name
print "\t", fname
# Let's create a list with the selected layer names
selected_layers = [ "Lai_1km", "FparLai_QC" ]
# We will store the data in a dictionary
# Initialise an empty dictionary
data = {}
# for convenience, we will use string substitution to create a
# template for GDAL filenames, which we'll substitute on the fly:
file_template = 'HDF4_EOS:EOS_GRID:"%s":MOD_Grid_MOD15A2:%s'
for i, layer in enumerate ( selected_layers ):
this_file = file_template % ( filename, layer )
print "Opening Layer %d: %s" % (i+1, this_file )
g = gdal.Open ( this_file )
if g is None:
raise IOError
data[layer] = g.ReadAsArray()
print "\t>>> Read %s!" % layer
# scale the LAI
lai = data['Lai_1km'] * 0.1
# pull out the QC
qc = data['FparLai_QC']
# find bit 0
qc = qc & 1
# generate the masked array
laim = np.ma.array ( lai, mask=qc )
We can use this as a function that reads a single dataset:
def read_modis_lai(filename):
''' Read MODIS LAI data from filename
and return masked array
'''
g = gdal.Open(filename)
# g should now be a GDAL dataset, but if the file isn't found
# g will be none. Let's test this:
if g is None:
print "Problem opening file %s!" % filename
else:
print "File %s opened fine" % filename
subdatasets = g.GetSubDatasets()
#for fname, name in subdatasets:
#print name
#print "\t", fname
# Let's create a list with the selected layer names
selected_layers = [ "Lai_1km", "FparLai_QC" ]
# We will store the data in a dictionary
# Initialise an empty dictionary
data = {}
# for convenience, we will use string substitution to create a
# template for GDAL filenames, which we'll substitute on the fly:
file_template = 'HDF4_EOS:EOS_GRID:"%s":MOD_Grid_MOD15A2:%s'
for i, layer in enumerate ( selected_layers ):
this_file = file_template % ( filename, layer )
#print "Opening Layer %d: %s" % (i+1, this_file )
g = gdal.Open ( this_file )
if g is None:
raise IOError
data[layer] = g.ReadAsArray()
#print "\t>>> Read %s!" % layer
# scale the LAI
lai = data['Lai_1km'] * 0.1
# pull out the QC
qc = data['FparLai_QC']
# find bit 0
qc = qc & 1
# generate the masked array
laim = np.ma.array ( lai, mask=qc )
return laim
and then loop:
import glob
year = 2012
tile = 'h17v03'
files = np.sort(glob.glob('files/data/MCD15A2.A%d*.%s.*.hdf'%(year,tile)))
lai = []
for f in files:
lai.append(read_modis_lai(f))
# force it to be a masked array
lai = np.ma.array(lai)
# plot the data
import pylab as plt
# work out a consistent scaling
lai_max = np.max(lai)
for i,f in enumerate(files):
plt.figure(figsize=(7,7))
plt.imshow(lai[i],interpolation='none',vmin=0.,vmax=lai_max*0.75)
# remember filenames of the form
# files/data/MCD15A2.A2011185.h09v05.005.2011213154534.hdf'
file_id = f.split('/')[-1].split('.')[-5][1:]
print file_id
# plot a jpg
plt.title(file_id)
plt.colorbar()
plt.savefig('files/images/lai_uk_%s.jpg'%file_id)
# now make a movie ...
import os
cmd = 'convert -delay 100 -loop 0 files/images/lai_uk_*.jpg files/images/lai_uk2.gif'
os.system(cmd)
We have now dowloaded a different dataset, the MOD10A product, which is the 500 m MODIS daily snow cover product, over the UK.
This is a good opportunity to see if you can apply what was learned above about interpreting QC information and using gdal to examine a dataset.
If you examine the data description page, you will see that the data are in HDF EOS format (the same as the LAI product).
%%bash
tile=h17v03
year=2012
type=MOST
month=02
file=robot_snow.${year}_${type}_${tile}_${month}.txt
grep $tile < files/data/robot_snow.$year.txt | grep $type | grep "${year}\.${month}" > files/data/$file
wc -l files/data/$file
# cd temporarily to the local directory
pushd files/data
# -nc : no clobber : dont download if its there already
# -nH --cut-dirs=3 : ignore the directories
wget -nc -i $file -nH --cut-dirs=3
# cd back again
popd
echo $file
We explore the dataset in the same way as above:
# how to find out which datasets are in the file
import gdal # Import GDAL library bindings
# The file that we shall be using
# Needs to be on current directory
filename = 'files/data/MOD10A1.A2012060.h17v03.005.2012062064750.hdf'
g = gdal.Open(filename)
# g should now be a GDAL dataset, but if the file isn't found
# g will be none. Let's test this:
if g is None:
print "Problem opening file %s!" % filename
else:
print "File %s opened fine" % filename
subdatasets = g.GetSubDatasets()
for fname, name in subdatasets:
print name
print "\t", fname
This is very nearly the same as the LAI dataset reading.
We need obviously to select a different layer name.
We should also notice that the file_template will be slightly different (look at the list of subsets above).
# How to access specific datasets in gdal
# Let's create a list with the selected layer names
selected_layers = [ "Fractional_Snow_Cover" ]
# We will store the data in a dictionary
# Initialise an empty dictionary
data = {}
# for convenience, we will use string substitution to create a
# template for GDAL filenames, which we'll substitute on the fly:
file_template = 'HDF4_EOS:EOS_GRID:"%s":MOD_Grid_Snow_500m:%s'
# This has two substitutions (the %s parts) which will refer to:
# - the filename
# - the data layer
for i, layer in enumerate ( selected_layers ):
this_file = file_template % ( filename, layer )
print "Opening Layer %d: %s" % (i+1, this_file )
g = gdal.Open ( this_file )
if g is None:
raise IOError
data[layer] = g.ReadAsArray()
print "\t>>> Read %s!" % layer
plt.imshow(data["Fractional_Snow_Cover"])
plt.colorbar()
plt.title('% snow cover')
The data description page tells us that values of 237 239 will indicate whether the data are ocean or inland water bodies. These are what you should use to build the water mask.
snow = data["Fractional_Snow_Cover"]
water = (snow == 239)
plt.imshow(water)
plt.colorbar()
plt.title('water mask')
snow = data["Fractional_Snow_Cover"]
valid_mask = (snow > 100)
plt.imshow(valid_mask)
plt.colorbar()
plt.title('valid mask')
You should be used to this sort of thing by now.
First, lets sort the filenames:
# filenames from the file
url_file = 'files/data/robot_snow.2012_MOST_h17v03_02.txt'
# open the file for read
fp = open(url_file,'r')
# the strip is to get rid of the \n character
filenames = [f.split('/')[-1].strip() for f in fp.readlines()]
# close the file
fp.close
# lets see what we got
print filenames
Using glob you might try:
# using glob
from glob import glob
filenames = glob('files/data/MOD10A1.A2012*.hdf')
print filenames
But that isn't refined enough (i.e. it won't only pull files for February if there are more).
So, we need to pull files from doy 32 to 60 inclusive ... which is a lot more tricky ...
# using glob
from glob import glob
filenames = glob('files/data/MOD10A1.A201203[2-9].*.hdf') +\
glob('files/data/MOD10A1.A201204?.*.hdf') +\
glob('files/data/MOD10A1.A201205?.*.hdf') +\
glob('files/data/MOD10A1.A2012060.*.hdf')
print filenames
Practically, you might be better off reading an interpreting the url file as above, or alternatively, put all of the February files in a different directory and glob that ...
# in any case, we should sort the filenames
filenames = sort(filenames)
Next, let's define a function to read a single file into a numpy array (and simplify what we have above a bit):
import numpy.ma as ma
def read_snow(filename):
layers = "Fractional_Snow_Cover"
# for convenience, we will use string substitution to create a
# template for GDAL filenames, which we'll substitute on the fly:
file_template = 'HDF4_EOS:EOS_GRID:"%s":MOD_Grid_Snow_500m:%s'
# This has two substitutions (the %s parts) which will refer to:
# - the filename
# - the data layer
this_file = file_template % ( filename, layer )
g = gdal.Open ( this_file )
if g is None:
raise IOError
snow = g.ReadAsArray()
# dont use this here, but just in case useful
#water = (snow == 239)
valid_mask = (snow > 100)
return ma.array(snow,mask=valid_mask)
Now, test it:
snow = read_snow(filenames[0])
plt.imshow(snow,vmax=100)
plt.colorbar()
plt.title(filenames[0])
Now its just a loop as in several previous examples:
snow = []
for f in filenames:
snow.append(read_snow(f))
snow = ma.array(snow)
# or neater ...
snow = ma.array([read_snow(f) for f in filenames])
print snow.shape
# now plot and save images
# plot the data
import pylab as plt
cmap = plt.cm.PuBu
for i,f in enumerate(filenames):
plt.figure(figsize=(7,7))
plt.imshow(snow[i],cmap=cmap,interpolation='none',vmin=0.,vmax=100)
# remember filenames of the form
# files/data/MCD15A2.A2011185.h09v05.005.2011213154534.hdf'
file_id = f.split('/')[-1].split('.')[-5][1:]
print file_id
# plot a jpg
plt.title(file_id)
plt.colorbar()
plt.savefig('files/images/snow_uk_%s.jpg'%file_id)
# now make a movie ...
import os
cmd = 'convert -delay 100 -loop 0 files/images/snow_uk_*.jpg files/images/snow_uk2.gif'
os.system(cmd)
Again, you should be used to this sort of thing by now.
We have the following code to base this on:
import sys
sys.path.insert(0,'files/python')
from raster_mask import raster_mask, getLAI
# test this on an LAI file
# the data file name
filename = 'files/data/MCD15A2.A2012273.h17v03.005.2012297134400.hdf'
# a layer (doesn't matter so much which: use for geometry info)
layer = 'Lai_1km'
# the full dataset specification
file_template = 'HDF4_EOS:EOS_GRID:"%s":MOD_Grid_MOD15A2:%s'
file_spec = file_template%(filename,layer)
# make a raster mask
# from the layer IRELAND in world.shp
mask = raster_mask(file_spec,\
target_vector_file = "files/data/world.shp",\
attribute_filter = "NAME = 'IRELAND'")
# get the LAI data
data = getLAI(filename)
# reset the data mask
# 'mask' is True for Ireland
# so take the opposite
data['Lai_1km'] = ma.array(data['Lai_1km'],mask=mask)
data['LaiStdDev_1km'] = ma.array(data['Lai_1km'],mask=mask)
plt.title('LAI for Eire: 2012273')
plt.imshow(data['Lai_1km'],vmax=6)
plt.colorbar()
Obviously, we will need to specify a set of filenames again.
These have a simple filename pattern, so glob would be appropriate (don't forget to sort:
from glob import glob
filenames = sort(glob('files/data/MCD15A2.A2012???.h17v03.005.*.hdf'))
print filenames
Now we just build a loop around that:
import sys
sys.path.insert(0,'files/python')
from raster_mask import raster_mask, getLAI
from glob import glob
# a layer (doesn't matter so much which: use for geometry info)
layer = 'Lai_1km'
# the full dataset specification
file_template = 'HDF4_EOS:EOS_GRID:"%s":MOD_Grid_MOD15A2:%s'
# list of filenames
filenames = sort(glob('files/data/MCD15A2.A2012???.h17v03.005.*.hdf'))
# build a mask: use filenames[0] for geometry
print 'building mask ...'
mask = raster_mask(file_template%(filenames[0],layer),\
target_vector_file = "files/data/world.shp",\
attribute_filter = "NAME = 'IRELAND'")
lai = []
print 'looping over files...'
for filename in filenames:
# get the LAI data
print filename
data = getLAI(filename)
# reset the data mask
lai.append(ma.array(data['Lai_1km'],mask=mask))
# convert to masked array
lai = ma.array(lai)
# make sure we can load it, then plot it
import numpy.ma as ma
import numpy as np
# now plot and save images
# plot the data
import pylab as plt
cmap = plt.cm.Greens
for i,f in enumerate(filenames):
plt.figure(figsize=(7,7))
plt.imshow(lai[i],cmap=cmap,interpolation='none',vmin=0.,vmax=6.)
# remember filenames of the form
# files/data/MCD15A2.A2011185.h09v05.005.2011213154534.hdf'
file_id = f.split('/')[-1].split('.')[-5][1:]
print file_id
# plot a jpg
plt.title(file_id)
plt.colorbar()
plt.savefig('files/images/lai_eire_%s.jpg'%title)
# now make a movie ...
import os
cmd = 'convert -delay 100 -loop 0 files/images/lai_eire_*.jpg files/images/lai_eire2.gif'
os.system(cmd)